### import datadf_raw<-read.csv(file=file.path(path,'data','raw_data','outcome.csv'))#### Clean data source("preprocess.R")df<-preprocess_data(df_raw)# items to use items_to_use<-'PDSS.SR'# options are #MADRS.S, LSAS.SR, PDSS.SRitemlabels<-itemlabels%>%as_tibble()%>%filter(str_detect(itemnr, items_to_use))# DIF+aux variables to use # DIF not used 'marital_status','children', 'working'dif_variables<-c('Patient','Treatment','sex','age','TreatmentAccessStart','education')### Make a backup of the dataframe, in case you need to revert changes at some pointdf.all<-dfprint(itemlabels)
As we see here PDSS.SR is not used for all conditions over the course of their treatments. However during screening they are. We start by showing all timepoints, to draw targeting plots.
3.1 Remove missing
Code
##### Before filtering out participants, you should check the missing data structure using RImissing() and RImissingP()# If you want to include participants with missing data, input the minimum number of items responses that a participant should have to be included in the analysis:min.responses<-4# In our case if they have missing on one item all items for that time are missing. df_save<-df# Select the variables we will work with, and filter out respondents with missing datadf<-df%>%select(all_of(c(dif_variables,"Time",itemlabels$itemnr)))%>%# vfilter(rowSums(is.na(select(.,all_of(itemlabels$itemnr))))<min.responses)
4 Create DIF variables
Code
#---- Create DIF variables----# DIF variables into vectors, recoded as factors since DIF functions need this# these could also be stored in its own dataframe (not a tibble) instead of as vectors# Named vector for the new typestype_transform<-c(Treatment ="factor", sex ="factor", age ="numeric", TreatmentAccessStart ="integer",education="factor",Time='factor')# Transform columns based on the named vectordif<-df%>%mutate(across(names(type_transform), ~switch(type_transform[which(names(type_transform)==cur_column())], "integer"=as.integer(.), "character"=as.character(.),"factor"=as.factor(.), "numeric"=as.numeric(.))))%>%as.data.frame(.)%>%select(!all_of(itemlabels$itemnr))# then remove them from dataframe, since we need a dataframe with only item data for the Rasch analysesdf<-df%>%select(all_of(c("Time",itemlabels$itemnr)))# add time here ? dfnotime<-df%>%select(!Time)source("RISE_theme.R")
5 Demographics descriptives
Code
dif_spec<-dif%>%filter(Time=='SCREEN')%>%select(!all_of(c("Time","Patient")))summary(dif_spec)# RIdemographics(dif_spec,'Treatment') <- function crashes TODO make own
Treatment sex age TreatmentAccessStart education
MDD:2853 F:3834 Min. :18.00 Min. :2008 Primary : 484
PD :1592 M:2366 1st Qu.:27.00 1st Qu.:2012 Secondary :3024
SAD:1755 Median :33.00 Median :2014 Postsecondary:2692
Mean :35.24 Mean :2014
3rd Qu.:42.00 3rd Qu.:2017
Max. :84.00 Max. :2019
6 Overall number of responses
Code
# Collapsed RIallresp(dfnotime)# Seperate RIallresp_over_times<-df%>%# split(.$Time)%>%# split the data map(~RIallresp(.x%>%dplyr::select(!Time)))#+ labs(title = .x$Time)) # create separate for each time# make nice later RI_allresp_kable_grid=combine_kables_grid(RIallresp_over_times,cols=3)RI_allresp_kable_grid
Response category
Number of responses
Percent
0
48333
31.8
1
54744
36.1
2
34135
22.5
3
12359
8.1
4
2182
1.4
x
SCREEN.Response category
SCREEN.Number of responses
SCREEN.Percent
PRE.Response category
PRE.Number of responses
PRE.Percent
WEEK01.Response category
WEEK01.Number of responses
WEEK01.Percent
0
16055
37.0
0
1661
14.1
0
1915
19.8
1
10996
25.3
1
3725
31.7
1
3879
40.1
2
10206
23.5
2
4114
35.0
2
2868
29.6
3
5057
11.7
3
1921
16.4
3
904
9.3
4
1086
2.5
4
325
2.8
4
115
1.2
WEEK02.Response category
WEEK02.Number of responses
WEEK02.Percent
WEEK03.Response category
WEEK03.Number of responses
WEEK03.Percent
WEEK04.Response category
WEEK04.Number of responses
WEEK04.Percent
0
2114
21.6
0
2413
24.7
0
2649
28.0
1
4268
43.6
1
4262
43.6
1
4041
42.8
2
2545
26.0
2
2355
24.1
2
2093
22.1
3
768
7.8
3
665
6.8
3
573
6.1
4
98
1.0
4
70
0.7
4
94
1.0
WEEK05.Response category
WEEK05.Number of responses
WEEK05.Percent
WEEK06.Response category
WEEK06.Number of responses
WEEK06.Percent
WEEK07.Response category
WEEK07.Number of responses
WEEK07.Percent
0
2679
29.6
0
2903
33.3
0
2841
34.1
1
3911
43.2
1
3656
41.9
1
3482
41.8
2
1887
20.8
2
1669
19.1
2
1540
18.5
3
487
5.4
3
425
4.9
3
397
4.8
4
87
1.0
4
76
0.9
4
70
0.8
WEEK08.Response category
WEEK08.Number of responses
WEEK08.Percent
WEEK09.Response category
WEEK09.Number of responses
WEEK09.Percent
WEEK10.Response category
WEEK10.Number of responses
WEEK10.Percent
0
3020
37.8
0
2962
38.8
0
3027
42.3
1
3273
40.9
1
3082
40.4
1
2842
39.7
2
1348
16.9
2
1230
16.1
2
1042
14.6
3
313
3.9
3
325
4.3
3
220
3.1
4
40
0.5
4
31
0.4
4
30
0.4
POST.Response category
POST.Number of responses
POST.Percent
" "
" "
0
4094
45.4
1
3327
36.9
2
1238
13.7
3
304
3.4
4
60
0.7
7 Descriptives of raw data
Response distribution for all items are summarized below.
#df_test = df %>% filter(Time=='SCREEN') %>% select(!Time)# Across time # seperated time - might need to fix this remake in ggplot?png(filename=paste0("./plots/",itemStart,"_rawdist_over_time.png"),width=800,height=2400)par(mfrow=c(7,2),mar =c(4, 4, 4, 2))# c(5 <- bottom lines, 4, 4, 2)RIrawdist_over_times<-df%>%# split(.$Time)%>%# split the data imap(~{RIrawdist(.x%>%dplyr::select(!Time))mtext(paste("Time:", .y), side =1, line =3, adj =0, cex =0.8)})dev.off()
The eRm package, which uses Conditional Maximum Likelihood (CML) estimation, will be used primarily. For this analysis, the Partial Credit Model will be used.
8.1 Timepoint decision
Due to the amount of data across timepoints targeting will first be inspected, and the best fitting timepoint will be chosen based on targeting, which in turn will inform the rest of the analyses.
8.1.1 Targeting
Code
# increase fig-height above as needed, if you have many items og. value was 5. #RItargeting(dfnotime)require(patchwork)targeting_time_plots<-ggplots_with_spec_func_over_time(df,func_call =RItargeting,title_all='', blankx=FALSE,colsplot =3,rowsplot=5,return_list_plots=TRUE)good_layout_targeting_plots<-wrap_plots(targeting_time_plots, ncol =3)+plot_layout(guides="collect")#print(good_layout_targeting_plots)ggsave( filename =paste0("./plots/",items_to_use,"_targeting_over_time.png"), plot =good_layout_targeting_plots, width =20, # Increase width (in inches) height =45, # Increase height (in inches) dpi =300# High resolution)print(good_layout_targeting_plots)
Pre has very good targeting looks promising. We also note that this is only for patient in treatment for panic disorder (no other treatment-conditions are mixed in PRE). We doublecheck how much data we have for each threshold. (Recomm. 20+)
Item Var gamma se pvalue padj.BH sig lower upper
1 PDSS.SR_1 grouping_based_on_score NaN NaN NA NA ? NA NA
2 PDSS.SR_2 grouping_based_on_score NaN NaN NA NA ? NA NA
3 PDSS.SR_3 grouping_based_on_score NaN NaN NA NA ? NA NA
4 PDSS.SR_4 grouping_based_on_score NaN NaN NA NA ? NA NA
5 PDSS.SR_5 grouping_based_on_score NaN NaN NA NA ? NA NA
6 PDSS.SR_6 grouping_based_on_score NaN NaN NA NA ? NA NA
7 PDSS.SR_7 grouping_based_on_score NaN NaN NA NA ? NA NA
From conditional item fit item 2 and 5 are flagged. However Item 2 have large residual correlations with 1 (possibly also seen at the contrast loadings), however item 5 is most different from the score groups. Removing item 5 due to the underfit.
Item Var gamma se pvalue padj.BH sig lower upper
1 PDSS.SR_1 grouping_based_on_score NaN NaN NA NA ? NA NA
2 PDSS.SR_2 grouping_based_on_score NaN NaN NA NA ? NA NA
3 PDSS.SR_3 grouping_based_on_score NaN NaN NA NA ? NA NA
4 PDSS.SR_4 grouping_based_on_score NaN NaN NA NA ? NA NA
5 PDSS.SR_6 grouping_based_on_score NaN NaN NA NA ? NA NA
6 PDSS.SR_7 grouping_based_on_score NaN NaN NA NA ? NA NA
Item Var gamma se pvalue padj.BH sig lower upper
1 PDSS.SR_1 grouping_based_on_score NaN NaN NA NA ? NA NA
2 PDSS.SR_3 grouping_based_on_score NaN NaN NA NA ? NA NA
3 PDSS.SR_4 grouping_based_on_score NaN NaN NA NA ? NA NA
4 PDSS.SR_6 grouping_based_on_score NaN NaN NA NA ? NA NA
5 PDSS.SR_7 grouping_based_on_score NaN NaN NA NA ? NA NA
The only really underperforming item is item 7. With the overfit indicated by the boostrap and the association / local dependence between item 4 and 7 as indicated by the partial gamma.
Item Var gamma se pvalue padj.BH sig lower upper
1 PDSS.SR_1 grouping_based_on_score NaN NaN NA NA ? NA NA
2 PDSS.SR_3 grouping_based_on_score NaN NaN NA NA ? NA NA
3 PDSS.SR_4 grouping_based_on_score NaN NaN NA NA ? NA NA
4 PDSS.SR_6 grouping_based_on_score NaN NaN NA NA ? NA NA
pfit_u3poly<-PerFit::U3poly(matrix =df_pre, Ncat =5, # make sure to input number of response categories, not thresholds IRT.PModel ="PCM")misfits<-PerFit::flagged.resp(pfit_u3poly)%>%pluck("Scores")%>%as.data.frame()%>%pull(FlaggedID)misfits2<-RIpfit(df_pre, output ="rowid")
The ordinal sum to interval score contains the information to transform into the below thetas, but we plot them here for convinience (based on 4 items)
Code
est_thetas2<-RIestThetas(df_pre, method ="WLE")hist(est_thetas2$WLE, col ="#009ca6", main ="Histogram of person locations (thetas)", breaks =10)
---title: "Rasch analysis of outcomes"subtitle: "PDSS-SR"title-block-banner: "#4F0433" title-block-banner-color: "#FFFFFF"author: - name: Nils Hentati Isacsson affiliation: Centre for Psychiatry Research, Department of Clinical Neuroscience, Karolinska Institutet, & Stockholm Health Care Services, Region Stockholm, Sweden affiliation-url: orcid: 0000-0002-5749-5310 - name: 'Magnus Johansson' affiliation: 'RISE Research Institutes of Sweden' affiliation-url: 'https://ri.se/en/shic' orcid: '0000-0003-1669-592X'date: last-modifieddate-format: isoalways_allow_html: trueformat: html: toc: true toc-depth: 3 toc-title: "Table of contents" embed-resources: true standalone: true page-layout: full mainfont: 'Lato' monofont: 'Roboto Mono' code-overflow: wrap code-fold: true code-tools: true code-link: true number-sections: true fig-dpi: 96 layout-align: left linestretch: 1.6 theme: - materia - custom.scss css: styles.css license: CC BY pdf: papersize: a4 documentclass: report execute: echo: true warning: false message: false cache: falseeditor_options: markdown: wrap: 72 chunk_output_type: console---# Setup ```{r}#| label: setuplibrary(easyRasch) # devtools::install_github("pgmj/easyRasch")#library(RISEkbmRasch)library(grateful)library(ggrepel)library(car)library(kableExtra)library(tidyverse)library(eRm)library(iarm)library(mirt)library(psych)library(psychotree)library(matrixStats)library(reshape)library(knitr)library(patchwork)library(formattable) library(glue)### optional libraries#library(TAM)#library(skimr)#library(janitor)# 2300610# nohup quarto render PDSSSR.qmd --to html &> render_pdsssr.out & ### some commands exist in multiple packages, here we define preferred ones that are frequently usedselect <- dplyr::selectcount <- dplyr::countrecode <- car::recoderename <- dplyr::renamepath_prim <-'/srv/projects/nils/study4rasch'path2 <-'/Volumes/projects/k8_CPF_Kaldo/Data/Lärande Maskiner/Nils/Study4data'running_machine =Sys.info()[['sysname']]path =ifelse(running_machine!='Linux',path2,path_prim)set_mywd_path =ifelse(running_machine!='Linux','./study4rasch/rasch_measures','./rasch_measures')n_cores =ifelse(running_machine!='Linux',1,8)tryCatch({setwd(set_mywd_path)}, error =function(set_mywd_path){print('already in right directory')})source('settings.R') # containing item labels source('mod_easyRasch_func.R') # modified functions + added ```# Importing data```{r, import data}### import datadf_raw <-read.csv(file=file.path(path,'data','raw_data','outcome.csv')) #### Clean data source("preprocess.R")df <-preprocess_data(df_raw)# items to use items_to_use <-'PDSS.SR'# options are #MADRS.S, LSAS.SR, PDSS.SRitemlabels <- itemlabels %>%as_tibble() %>%filter(str_detect(itemnr, items_to_use))# DIF+aux variables to use # DIF not used 'marital_status','children', 'working'dif_variables <-c('Patient','Treatment','sex','age','TreatmentAccessStart','education')### Make a backup of the dataframe, in case you need to revert changes at some pointdf.all <- dfprint(itemlabels)```# Checking missing ```{r,missing numbers}#| label: MissingitemStart <- items_to_useout =RImissing_mod(df,itemStart,facet="Time")print(out)```As we see here PDSS.SR is not used for all conditions over the course of their treatments. However during screening they are. We start by showing all timepoints, to draw targeting plots.## Remove missing ```{r, filter missing}##### Before filtering out participants, you should check the missing data structure using RImissing() and RImissingP()# If you want to include participants with missing data, input the minimum number of items responses that a participant should have to be included in the analysis:min.responses <-4# In our case if they have missing on one item all items for that time are missing. df_save <- df# Select the variables we will work with, and filter out respondents with missing datadf <- df %>%select(all_of(c(dif_variables,"Time",itemlabels$itemnr))) %>%# vfilter(rowSums(is.na(select(.,all_of(itemlabels$itemnr)))) < min.responses) ```# Create DIF variables ```{r, dif creation}#---- Create DIF variables----# DIF variables into vectors, recoded as factors since DIF functions need this# these could also be stored in its own dataframe (not a tibble) instead of as vectors# Named vector for the new typestype_transform <-c(Treatment ="factor", sex ="factor", age ="numeric",TreatmentAccessStart ="integer",education="factor",Time='factor')# Transform columns based on the named vectordif <- df %>%mutate(across(names(type_transform), ~switch(type_transform[which(names(type_transform) ==cur_column())], "integer"=as.integer(.), "character"=as.character(.),"factor"=as.factor(.), "numeric"=as.numeric(.)))) %>%as.data.frame(.) %>%select(!all_of(itemlabels$itemnr))# then remove them from dataframe, since we need a dataframe with only item data for the Rasch analysesdf <- df %>%select(all_of(c("Time",itemlabels$itemnr))) # add time here ? dfnotime <- df %>%select(!Time)source("RISE_theme.R")```# Demographics descriptives ```{r, demographics}#| layout-ncol: 2dif_spec <- dif %>%filter(Time=='SCREEN') %>%select(!all_of(c("Time","Patient")))summary(dif_spec)# RIdemographics(dif_spec,'Treatment') <- function crashes TODO make own ```# Overall number of responses ```{r, n in each cell}#| layout-ncol: 1#| fig-width: 9# Collapsed RIallresp(dfnotime)# Seperate RIallresp_over_times <- df %>%# split(.$Time) %>%# split the data map(~RIallresp(.x %>% dplyr::select(!Time))) #+ labs(title = .x$Time)) # create separate for each time# make nice later RI_allresp_kable_grid =combine_kables_grid(RIallresp_over_times,cols=3)RI_allresp_kable_grid```# Descriptives of raw dataResponse distribution for all items are summarized below.## Floor/ceiling effects ```{r,floorceiling-all}RIrawdist(dfnotime)``````{r, floorceiling}#| fig-height: 45#| fig-width: 20#df_test = df %>% filter(Time=='SCREEN') %>% select(!Time)# Across time # seperated time - might need to fix this remake in ggplot?png(filename=paste0("./plots/",itemStart,"_rawdist_over_time.png"),width=800,height=2400)par(mfrow=c(7,2),mar =c(4, 4, 4, 2)) # c(5 <- bottom lines, 4, 4, 2)RIrawdist_over_times <- df %>%# split(.$Time) %>%# split the data imap(~ {RIrawdist(.x %>% dplyr::select(!Time))mtext(paste("Time:", .y), side =1, line =3, adj =0, cex =0.8) })dev.off()print('Saved plot to /plots')```## Guttman structure ```{r, guttman}RIheatmap(dfnotime)``````{r, guttman-time}#| fig-height: 45#| fig-width: 20ggplots_with_spec_func_over_time(df,func_call = RIheatmap,title_all='Guttman structure over time',blankx=TRUE)```## Descriptives - item level```{r, listmargins}#| column: margindf_test = df %>%filter(Time=='SCREEN') %>%select(!Time)RIlistItemsMargin(df, fontsize =12)```::: panel-tabset### Tile plot```{r, tileplot}# over all times RItileplot(dfnotime)``````{r, tileplot time}#| fig-height: 45#| fig-width: 20#| # by time ggplots_with_spec_func_over_time(df,func_call = RItileplot,title_all='',blankx=FALSE)```### Stacked bars```{r, bars}RIbarstack(dfnotime)``````{r, bars time}#| fig-height: 45#| fig-width: 20# by time ggplots_with_spec_func_over_time(df,func_call = RIbarstack,title_all='',blankx=FALSE)```### Barplots```{r, barplots}#| layout-ncol: 2RIbarplot(dfnotime)```:::# Rasch analysis 1```{r, listmargins2}#| column: margindf_test = df %>%filter(Time=='SCREEN') %>%select(!Time)RIlistItemsMargin(df, fontsize =12)```The eRm package, which uses Conditional Maximum Likelihood (CML)estimation, will be used primarily. For this analysis, the PartialCredit Model will be used.## Timepoint decision Due to the amount of data across timepoints targeting will first be inspected, and the best fitting timepoint will be chosen based on targeting, which in turn will inform the rest of the analyses. ### Targeting ```{r, targeting}#| fig-height: 45#| fig-width: 20# increase fig-height above as needed, if you have many items og. value was 5. #RItargeting(dfnotime)require(patchwork)targeting_time_plots <-ggplots_with_spec_func_over_time( df,func_call = RItargeting,title_all='',blankx=FALSE,colsplot =3,rowsplot=5,return_list_plots=TRUE)good_layout_targeting_plots <-wrap_plots(targeting_time_plots, ncol =3) +plot_layout(guides="collect")#print(good_layout_targeting_plots)ggsave(filename =paste0("./plots/",items_to_use,"_targeting_over_time.png"),plot = good_layout_targeting_plots,width =20, # Increase width (in inches)height =45, # Increase height (in inches)dpi =300# High resolution)print(good_layout_targeting_plots)```Pre has very good targeting looks promising. We also note that this is only for patient in treatment for panic disorder (no other treatment-conditions are mixed in PRE). We doublecheck how much data we have for each threshold. (Recomm. 20+)### Number of responses per threshhold ```{r, thresh-responses}dataframen4 <- df %>%filter(Time=="PRE") %>%select(!Time)sapply(dataframen4, function(column) table(column))```Based on the above we move forward with the PRE timepoint. ## Analysis continuation PRE```{r,subset_chosen_timepoint}df_pre <- df %>%filter(Time=='PRE') %>%select(!Time) #%>% sample_n(800)```::: panel-tabset### Conditional item fit```{r, conditional item fit}RIitemfit(df_pre, cutoff ="Smith98")simfit1 <-RIgetfit(df_pre, iterations =300, cpu = n_cores) RIitemfit(df_pre, simfit1)RIgetfitPlot(simfit1, df_pre)```### CICC```{r, CICCp1}ICCplot(as.data.frame(df_pre), itemnumber =1:4, method ="cut", itemdescrip = itemlabels$itemnr[1:4])### also suggested:# library(RASCHplot) # install first with `devtools::install_github("ERRTG/RASCHplot")`# CICCplot(PCM(df),# which.item = c(1:4),# lower.groups = c(0,7,14,21,28,35),# grid.items = TRUE)``````{r, CICCp2}ICCplot(as.data.frame(df_pre), itemnumber =5:7, method ="cut", itemdescrip = itemlabels$itemnr[5:7])```### Item-restscore```{r, normal restscore}RIrestscore(df_pre)```### Item-restscore-bootstrapped```{r,bootstrap restscore}RIbootRestscore(df_pre,cpu=n_cores,iterations =500,samplesize=800) # samplesize=600```### PCA```{r, PCA}#| tbl-cap: "PCA of Rasch model residuals"RIpcmPCA(df_pre)```### Residual correlations```{r, Residual corr}simcor1 <-RIgetResidCor(df_pre, iterations =500, cpu = n_cores)RIresidcorr(df_pre, cutoff = simcor1$p99)```### Partial gamma LD ```{r, partial gamma}RIpartgamLD(df_pre)```### 1st contrast loadings```{r, contrast loadings}RIloadLoc(df_pre)```### Response categories```{r, resp categories}#| fig-height: 7mirt(df_pre, model=1, itemtype='Rasch', verbose =FALSE) %>%plot(type="trace", as.table =TRUE, theta_lim =c(-6,6))# for fewer items or a more magnified figure, use:#RIitemCats(df)```### Targeting```{r, targeting again}#| fig-height: 7# increase fig-height above as needed, if you have many itemsRItargeting(df_pre)```### Item hierarchy```{r, Item hierarchy}#| fig-height: 7RIitemHierarchy(df_pre)```### Score groups```{r, score groups}iarm::score_groups(as.data.frame(df_pre)) %>%as.data.frame(nm ="score_group") %>% dplyr::count(score_group)dif_plots <- df_pre %>%add_column(dif = iarm::score_groups(.)) %>%split(.$dif) %>%# split the data using the DIF variablemap(~RItileplot(.x %>% dplyr::select(!dif)) +labs(title = .x$dif))dif_plots[[1]] + dif_plots[[2]]clr_tests(df_pre, model ="PCM")item_obsexp(PCM(df_pre))grouping_based_on_score =score_groups(as.data.frame(df_pre))partgam_DIF(dat.items =as.data.frame(df_pre),dat.exo = grouping_based_on_score) ```### DIF-sex```{r, dif-sex}dif.sex <- dif %>%filter(Time=='PRE') %>%select(sex) %>%as.vector(.)dif.sex <- dif.sex[[1]] #female = 1, male = 2 RIdifTable(df_pre, dif.sex)partgam_DIF(dat.items =as.data.frame(df_pre),dat.exo = dif.sex) ```### DIF-age```{r, dif-age}dif.age <- dif %>%filter(Time=='PRE') %>%select(age) %>%as.vector(.)dif.age <- dif.age[[1]]RIdifTable(df_pre, dif.age)partgam_DIF(dat.items =as.data.frame(df_pre),dat.exo = dif.age) ```### DIF-education```{r, dif-edu}dif.edu <- dif %>%filter(Time=='PRE') %>%select(education) %>%as.vector(.)dif.edu <- dif.edu[[1]]RIdifTable(df_pre, dif.edu)partgam_DIF(dat.items =as.data.frame(df_pre),dat.exo = dif.edu) ```### DIF-Year```{r, dif-yr1}dif.yr <- dif %>%filter(Time=='PRE') %>%select(TreatmentAccessStart) %>%as.vector(.)dif.yr <- dif.yr[[1]]RIdifTable(df_pre, dif.yr)partgam_DIF(dat.items =as.data.frame(df_pre),dat.exo = dif.yr) ```:::## Analysis part 1 decision From conditional item fit item 2 and 5 are flagged. However Item 2 have large residual correlations with 1 (possibly also seen at the contrast loadings), however item 5 is most different from the score groups. Removing item 5 due to the underfit. # Rasch analysis 2Based on the above we remove item 5 ```{r, filter n listmargins3}#| column: margindf_pre <- df %>%filter(Time=='PRE') %>%select(!c(Time,PDSS.SR_5)) #%>% sample_n(800)RIlistItemsMargin(df_pre, fontsize =12)```::: panel-tabset### Conditional item fit```{r, conditional item fit2}RIitemfit(df_pre, cutoff ="Smith98")simfit1 <-RIgetfit(df_pre, iterations =300, cpu = n_cores) RIitemfit(df_pre, simfit1)RIgetfitPlot(simfit1, df_pre)```### CICC```{r, CICCp1_2}ICCplot(as.data.frame(df_pre), itemnumber =1:4, method ="cut", itemdescrip = itemlabels$itemnr[1:4])### also suggested:# library(RASCHplot) # install first with `devtools::install_github("ERRTG/RASCHplot")`# CICCplot(PCM(df),# which.item = c(1:4),# lower.groups = c(0,7,14,21,28,35),# grid.items = TRUE)``````{r, CICCp2_2}ICCplot(as.data.frame(df_pre), itemnumber =5:6, method ="cut", itemdescrip = itemlabels$itemnr[5:6])```### Item-restscore```{r, normal restscore2}RIrestscore(df_pre)```### Item-restscore-bootstrapped```{r,bootstrap restscore2}RIbootRestscore(df_pre,cpu=n_cores,iterations =500,samplesize=800) # samplesize=600```### PCA```{r, PCA2}#| tbl-cap: "PCA of Rasch model residuals"RIpcmPCA(df_pre)```### Residual correlations```{r, Residual corr2}simcor1 <-RIgetResidCor(df_pre, iterations =500, cpu = n_cores)RIresidcorr(df_pre, cutoff = simcor1$p99)```### Partial gamma LD ```{r, partial gamma2}RIpartgamLD(df_pre)```### 1st contrast loadings```{r, contrast loadings2}RIloadLoc(df_pre)```### Response categories```{r, resp categories2}#| fig-height: 7mirt(df_pre, model=1, itemtype='Rasch', verbose =FALSE) %>%plot(type="trace", as.table =TRUE, theta_lim =c(-6,6))# for fewer items or a more magnified figure, use:#RIitemCats(df)```### Targeting```{r, targeting again2}#| fig-height: 7# increase fig-height above as needed, if you have many itemsRItargeting(df_pre)```### Item hierarchy```{r, Item hierarchy2}#| fig-height: 7RIitemHierarchy(df_pre)```### Score groups```{r, score groups2}iarm::score_groups(as.data.frame(df_pre)) %>%as.data.frame(nm ="score_group") %>% dplyr::count(score_group)dif_plots <- df_pre %>%add_column(dif = iarm::score_groups(.)) %>%split(.$dif) %>%# split the data using the DIF variablemap(~RItileplot(.x %>% dplyr::select(!dif)) +labs(title = .x$dif))dif_plots[[1]] + dif_plots[[2]]clr_tests(df_pre, model ="PCM")item_obsexp(PCM(df_pre))grouping_based_on_score =score_groups(as.data.frame(df_pre))partgam_DIF(dat.items =as.data.frame(df_pre),dat.exo = grouping_based_on_score) ```### DIF-sex```{r, dif-sex2}dif.sex <- dif %>%filter(Time=='PRE') %>%select(sex) %>%as.vector(.)dif.sex <- dif.sex[[1]] #female = 1, male = 2 RIdifTable(df_pre, dif.sex)partgam_DIF(dat.items =as.data.frame(df_pre),dat.exo = dif.sex) ```### DIF-age```{r, dif-age2}dif.age <- dif %>%filter(Time=='PRE') %>%select(age) %>%as.vector(.)dif.age <- dif.age[[1]]RIdifTable(df_pre, dif.age)partgam_DIF(dat.items =as.data.frame(df_pre),dat.exo = dif.age) ```### DIF-education```{r, dif-edu2}dif.edu <- dif %>%filter(Time=='PRE') %>%select(education) %>%as.vector(.)dif.edu <- dif.edu[[1]]RIdifTable(df_pre, dif.edu)partgam_DIF(dat.items =as.data.frame(df_pre),dat.exo = dif.edu) ```### DIF-Year```{r, dif-yr12}dif.yr <- dif %>%filter(Time=='PRE') %>%select(TreatmentAccessStart) %>%as.vector(.)dif.yr <- dif.yr[[1]]RIdifTable(df_pre, dif.yr)partgam_DIF(dat.items =as.data.frame(df_pre),dat.exo = dif.yr) ```:::## Analysis part 2 decision Item 2 displays the most misfit and most residual correlations (with 1 too). # Rasch analysis 3Based on the above we remove item 2```{r, filter n listmargins4}#| column: margindf_pre <- df %>%filter(Time=='PRE') %>%select(!c(Time,PDSS.SR_5,PDSS.SR_2)) #%>% sample_n(800)RIlistItemsMargin(df_pre, fontsize =12)```::: panel-tabset### Conditional item fit```{r, conditional item fit3}RIitemfit(df_pre, cutoff ="Smith98")simfit1 <-RIgetfit(df_pre, iterations =300, cpu = n_cores) RIitemfit(df_pre, simfit1)RIgetfitPlot(simfit1, df_pre)```### CICC```{r, CICCp1_3}ICCplot(as.data.frame(df_pre), itemnumber =1:4, method ="cut", itemdescrip = itemlabels$itemnr[c(1,3,4,6)])### also suggested:# library(RASCHplot) # install first with `devtools::install_github("ERRTG/RASCHplot")`# CICCplot(PCM(df),# which.item = c(1:4),# lower.groups = c(0,7,14,21,28,35),# grid.items = TRUE)``````{r, CICCp2_3}ICCplot(as.data.frame(df_pre), itemnumber =5, method ="cut", itemdescrip = itemlabels$itemnr[7])```### Item-restscore```{r, normal restscore3}RIrestscore(df_pre)```### Item-restscore-bootstrapped```{r,bootstrap restscore3}RIbootRestscore(df_pre,cpu=n_cores,iterations =500,samplesize=800) # samplesize=600```### PCA```{r, PCA3}#| tbl-cap: "PCA of Rasch model residuals"RIpcmPCA(df_pre)```### Residual correlations```{r, Residual corr3}simcor1 <-RIgetResidCor(df_pre, iterations =500, cpu = n_cores)RIresidcorr(df_pre, cutoff = simcor1$p99)```### Partial gamma LD ```{r, partial gamma3}RIpartgamLD(df_pre)```### 1st contrast loadings```{r, contrast loadings3}RIloadLoc(df_pre)```### Response categories```{r, resp categories3}#| fig-height: 7mirt(df_pre, model=1, itemtype='Rasch', verbose =FALSE) %>%plot(type="trace", as.table =TRUE, theta_lim =c(-6,6))# for fewer items or a more magnified figure, use:#RIitemCats(df)```### Targeting```{r, targeting again3}#| fig-height: 7# increase fig-height above as needed, if you have many itemsRItargeting(df_pre)```### Item hierarchy```{r, Item hierarchy3}#| fig-height: 7RIitemHierarchy(df_pre)```### Score groups```{r, score groups3}iarm::score_groups(as.data.frame(df_pre)) %>%as.data.frame(nm ="score_group") %>% dplyr::count(score_group)dif_plots <- df_pre %>%add_column(dif = iarm::score_groups(.)) %>%split(.$dif) %>%# split the data using the DIF variablemap(~RItileplot(.x %>% dplyr::select(!dif)) +labs(title = .x$dif))dif_plots[[1]] + dif_plots[[2]]clr_tests(df_pre, model ="PCM")item_obsexp(PCM(df_pre))grouping_based_on_score =score_groups(as.data.frame(df_pre))partgam_DIF(dat.items =as.data.frame(df_pre),dat.exo = grouping_based_on_score) ```### DIF-sex```{r, dif-sex3}dif.sex <- dif %>%filter(Time=='PRE') %>%select(sex) %>%as.vector(.)dif.sex <- dif.sex[[1]] #female = 1, male = 2 RIdifTable(df_pre, dif.sex)partgam_DIF(dat.items =as.data.frame(df_pre),dat.exo = dif.sex) ```### DIF-age```{r, dif-age3}dif.age <- dif %>%filter(Time=='PRE') %>%select(age) %>%as.vector(.)dif.age <- dif.age[[1]]RIdifTable(df_pre, dif.age)partgam_DIF(dat.items =as.data.frame(df_pre),dat.exo = dif.age) ```### DIF-education```{r, dif-edu3}dif.edu <- dif %>%filter(Time=='PRE') %>%select(education) %>%as.vector(.)dif.edu <- dif.edu[[1]]RIdifTable(df_pre, dif.edu)partgam_DIF(dat.items =as.data.frame(df_pre),dat.exo = dif.edu) ```### DIF-Year```{r, dif-yr13}dif.yr <- dif %>%filter(Time=='PRE') %>%select(TreatmentAccessStart) %>%as.vector(.)dif.yr <- dif.yr[[1]]RIdifTable(df_pre, dif.yr)partgam_DIF(dat.items =as.data.frame(df_pre),dat.exo = dif.yr) ```:::## Analysis part 3 decision The only really underperforming item is item 7. With the overfit indicated by the boostrap and the association / local dependence between item 4 and 7 as indicated by the partial gamma. # Rasch analysis 4Based on the above we remove item 7```{r, filter n listmargins5}#| column: margindf_pre <- df %>%filter(Time=='PRE') %>%select(!c(Time,PDSS.SR_5,PDSS.SR_2,PDSS.SR_7)) #%>% sample_n(800)RIlistItemsMargin(df_pre, fontsize =12)```::: panel-tabset### Conditional item fit```{r, conditional item fit5}RIitemfit(df_pre, cutoff ="Smith98")simfit1 <-RIgetfit(df_pre, iterations =300, cpu = n_cores) RIitemfit(df_pre, simfit1)RIgetfitPlot(simfit1, df_pre)```### CICC```{r, CICCp1_6}ICCplot(as.data.frame(df_pre), itemnumber =1:4, method ="cut", itemdescrip = itemlabels$itemnr[c(1,3,4,6)])### also suggested:# library(RASCHplot) # install first with `devtools::install_github("ERRTG/RASCHplot")`# CICCplot(PCM(df),# which.item = c(1:4),# lower.groups = c(0,7,14,21,28,35),# grid.items = TRUE)```### Item-restscore```{r, normal restscore4}RIrestscore(df_pre)```### Item-restscore-bootstrapped```{r,bootstrap restscore4}RIbootRestscore(df_pre,cpu=n_cores,iterations =500,samplesize=800) # samplesize=600```### PCA```{r, PCA4}#| tbl-cap: "PCA of Rasch model residuals"RIpcmPCA(df_pre)```### Residual correlations```{r, Residual corr4}simcor1 <-RIgetResidCor(df_pre, iterations =500, cpu = n_cores)RIresidcorr(df_pre, cutoff = simcor1$p99)```### Partial gamma LD ```{r, partial gamma4}RIpartgamLD(df_pre)```### 1st contrast loadings```{r, contrast loadings4}RIloadLoc(df_pre)```### Response categories```{r, resp categories4}#| fig-height: 7mirt(df_pre, model=1, itemtype='Rasch', verbose =FALSE) %>%plot(type="trace", as.table =TRUE, theta_lim =c(-6,6))# for fewer items or a more magnified figure, use:#RIitemCats(df)```### Targeting```{r, targeting again4}#| fig-height: 7# increase fig-height above as needed, if you have many itemsRItargeting(df_pre)```### Item hierarchy```{r, Item hierarchy4}#| fig-height: 7RIitemHierarchy(df_pre)```### Score groups```{r, score groups4}iarm::score_groups(as.data.frame(df_pre)) %>%as.data.frame(nm ="score_group") %>% dplyr::count(score_group)dif_plots <- df_pre %>%add_column(dif = iarm::score_groups(.)) %>%split(.$dif) %>%# split the data using the DIF variablemap(~RItileplot(.x %>% dplyr::select(!dif)) +labs(title = .x$dif))dif_plots[[1]] + dif_plots[[2]]clr_tests(df_pre, model ="PCM")item_obsexp(PCM(df_pre))grouping_based_on_score =score_groups(as.data.frame(df_pre))partgam_DIF(dat.items =as.data.frame(df_pre),dat.exo = grouping_based_on_score) ```### DIF-sex```{r, dif-sex4}dif.sex <- dif %>%filter(Time=='PRE') %>%select(sex) %>%as.vector(.)dif.sex <- dif.sex[[1]] #female = 1, male = 2 RIdifTable(df_pre, dif.sex)partgam_DIF(dat.items =as.data.frame(df_pre),dat.exo = dif.sex) ```### DIF-age```{r, dif-age4}dif.age <- dif %>%filter(Time=='PRE') %>%select(age) %>%as.vector(.)dif.age <- dif.age[[1]]RIdifTable(df_pre, dif.age)partgam_DIF(dat.items =as.data.frame(df_pre),dat.exo = dif.age) ```### DIF-education```{r, dif-edu4}dif.edu <- dif %>%filter(Time=='PRE') %>%select(education) %>%as.vector(.)dif.edu <- dif.edu[[1]]RIdifTable(df_pre, dif.edu)partgam_DIF(dat.items =as.data.frame(df_pre),dat.exo = dif.edu) ```### DIF-Year```{r, dif-yr14}dif.yr <- dif %>%filter(Time=='PRE') %>%select(TreatmentAccessStart) %>%as.vector(.)dif.yr <- dif.yr[[1]]RIdifTable(df_pre, dif.yr)partgam_DIF(dat.items =as.data.frame(df_pre),dat.exo = dif.yr) ```:::## Analysis part 4 decision While item 3 have some small overfit in the boostrap (~15%) it is not so severe. As is the small DIF indications on age. # Analysis conclusion Resulting PDSS-SR reworked scale thus has item 1, 3, 4 and 6. ## Resulting items ```{r,subset_chosen_items conc}df_pre <- df %>%filter(Time=='PRE') %>%select(!c(Time,PDSS.SR_5,PDSS.SR_2,PDSS.SR_7)) #%>% sample_n(800)what_scale <-gsub('\\.','',items_to_use)``````{r, listmargins-subbed conc}#| column: marginRIlistItemsMargin(df_pre, fontsize =12)```## Reliability ```{r, rel}RItif(df_pre)``````{r, rel 2}RItif(df_pre,samplePSI=TRUE)```## Person parameter ```{r, personfit}RIpfit(df_pre)```## Misfit check ```{r}pfit_u3poly <- PerFit::U3poly(matrix = df_pre, Ncat =5, # make sure to input number of response categories, not thresholdsIRT.PModel ="PCM")misfits <- PerFit::flagged.resp(pfit_u3poly) %>%pluck("Scores") %>%as.data.frame() %>%pull(FlaggedID)misfits2 <-RIpfit(df_pre, output ="rowid")```::: panel-tabset### All resp ```{r}RIitemfit(df_pre, simcut = simfit1)```### U3 misfit removed```{r}RIitemfit(df_pre[-misfits,], simcut = simfit1)RItif(df_pre[-misfits,])```### ZSTD removed ```{r}RIitemfit(df_pre[-misfits2,], simcut = simfit1)RItif(df_pre[-misfits2,])```:::## Item parameters```{r, item params}RIitemparams(df_pre)item_param_matrix <-RIitemparams(df_pre,output='dataframe') %>%as.matrix(.)write.csv(item_param_matrix,file=paste0("./results/item_params_",what_scale,'.csv'),row.names=TRUE)```## Ordinal sum to interval score ```{r, RI_score}RIscoreSE(df_pre)RIscoreSE(df_pre,output='figure')sum_to_latent <-RIscoreSE(df_pre,output ='dataframe')write.csv(sum_to_latent,file=paste0('./results/ordinal_sum_to_latent_',what_scale,'.csv'),row.names=FALSE)```## ThetasThe ordinal sum to interval score contains the information to transform into the below thetas, but we plot them here for convinience (based on 4 items)```{r, thetas}est_thetas2 <-RIestThetas(df_pre, method ="WLE")hist(est_thetas2$WLE, col ="#009ca6", main ="Histogram of person locations (thetas)", breaks =10)```# Software used```{r}pkgs <- grateful::cite_packages(cite.tidyverse =TRUE, output ="table",bib.file ="grateful-refs.bib",include.RStudio =FALSE,out.dir =getwd())formattable(pkgs, table.attr ='class=\"table table-striped\" style="font-size: 15px; font-family: Lato; width: 80%"')```# Session info ```{r}sessionInfo()```# References